HTML.XIL

HTML.XIL is the default indexsheet for HTML content. The HTML.XIL indexsheet includes definitions for common fields, handling of hit-anchors and hit-highlighting, indexing of title tags, creation of table of contents structure, and handling of break words.

<?xml version='1.0'?>
<!-- Default indexsheet for HTML -->

    <xsl:stylesheet case-sensitive="no" xmlns:xsl=
            "http://www.w3.org/1999/XSL/Transform" xmlns:np=
            "http://www.rocketsoftware.com/ns/indexsheet/2.0" extension
            -element-prefixes="np">
          
        <np:definitions>
            <field name="dc:title"       type="text" 
                term-list="yes" proximity="no" relevance="highest" />
            <field name="dc:creator"     type="text" 
                term-list="yes" proximity="no" relevance="highest" />
            <field name="dc:subject"     type="text" 
                term-list="yes" proximity="no" relevance="highest" />
            <field name="dc:description" type="text" 
                term-list="yes" proximity="no" relevance="highest" />
        </np:definitions>

        <xsl:template match='META[@name="description"]'>
            <np:index-attribute name="content" field="dc:description"/>
        </xsl:template>

        <xsl:template match='META[@name="author"]'>
            <np:index-attribute name="content" field="dc:creator"/>
        </xsl:template>

        <xsl:template match='META[@name="keywords"]'>
            <np:index-attribute name="content" field="dc:subject"/>
        </xsl:template>

          
        <!-- hit-anchors are not allowed within an HTML "A" 
                element which is a link  -->
        <!-- Because of its importance, this rule is enforced 
                internally for HTML if not specified  -->
        <xsl:template match="A[attribute(HREF)]">
            <np:index hit-anchor="postpone">
                <xsl:apply-templates/>
            </np:index>  
        </xsl:template>
     
        <!-- Neither hit-anchor nor hit-hilite is allowed 
                within HTML "HEAD" element -->
        <!-- A hit can occur within HTML "HEAD" element 
                when indexing TITLE or other text in heading -->
        <xsl:template match='HEAD'>
            <np:index hit-anchor="no" hit-hilite="no">
                <xsl:apply-templates/>
            </np:index>  
        </xsl:template>
     
        <!-- It is better to not index title when it is the same 
              for all documents or the same as first heading  -->
        <!-- However the HTML "TITLE" element can be indexed as 
              long as a rule is used to not allow hit-anchor nor 
              hit-hilite  -->
        <xsl:template match='TITLE'>
            <np:index index="no">
                <xsl:apply-templates/>
            </np:index>  
        </xsl:template>
    
	    <!-- Do not index SCRIPT -->
        <xsl:template match='SCRIPT'>
            <np:index index="no" hit-hilite="no">
                <xsl:apply-templates/>
            </np:index>  
        </xsl:template>
 
     	<!-- Do not index STYLE -->
        <xsl:template match='STYLE'>
            <np:index index="no" hit-hilite="no">
                <xsl:apply-templates/>
            </np:index>  
        </xsl:template>
     
        <!-- Generate sub-document table of contents (TOC) 
                hierarchy from HTML headings H1 to H6 -->
        <!-- The first heading found is used as document title -->
        <xsl:template match='H1|H2|H3|H4|H5|H6'>
            <np:index toc-heading="title-HTML" title-field=
                "dc:title">
                <xsl:apply-templates/>
            </np:index>  
        </xsl:template>
        

        <!-- proximity="paragraph" marks paragraphs for 
                paragraph proximity searching and automatic 
                abstract generation -->
        <!-- break-word is needed when the P element is used 
                without surrounding whitespace to prevent words 
                from being stuck together -->
        <xsl:template match="P">
            <np:index proximity="paragraph" break-word="yes">
                <xsl:apply-templates/>
            </np:index>
        </xsl:template>

        <!-- A hit-total replace tag is placed at end of BODY 
                element.  The form generatd by it is required for 
                next/prev hit functionality  -->
        <xsl:template match="BODY">
            <np:index hit-total="yes" break-word="yes">
                <xsl:apply-templates/>
            </np:index>
        </xsl:template>

        <!-- break-word rule is needed when the following 
                elements are used without surrounding whitespace to 
                prevent words from being stuck together -->
        <!-- Word breaks rules are now included by default, 
                but can optionally be edited or removed from 
                indexsheet for HTML -->
            <xsl:template match="ADDRESS|BR|BLOCKQUOTE|BUTTON
                  |CENTER|DD|DT|DIV|FORM|FRAME|HR|IFRAME|IMG|INPUT
                  |ISINDEX|LI|NOFRAMES|NOSCRIPT|NOEMBED|OBJECT
                  |OPTION|PRE|PLAINTEXT|SPACER|TR|TD|TH|TABLE
                  |TEXTAREA|WBR">
                <np:index break-word="yes">
                    <xsl:apply-templates/>
                </np:index>
            </xsl:template>
        
    </xsl:stylesheet>